set.seed(132)
require(readr)
require(rsample)
require(skimr)
require(tidymodels)
require(kernlab)
require(dplyr)
require(purrr)
require(yardstick)
require(tictoc)
require(parsnip)
require(furrr)
require(gridExtra)
require(grid)
require(mlbench)
require(probably)
require(ROSE)
require(discrim)
require(modelStudio)
require(janitor)
require(DataExplorer)
require(patchwork)
library(ggforce)
require(reshape2)
require(ggplot2)
require(future)
require(tidyverse)
require(future.apply)
require(glmnet)
require(data.table)
require(survival)
require(ranger)
require(prodlim)
require(riskRegression)
require(pec)
require(survminer)
require(ggcorrplot)
require(corrr)
library(doFuture)
PivotData2a <- Data2a %>%
select(-c( primary_ep,secondary_ep)) %>%
pivot_longer(-c(sudden_cd,id), names_to = "Parameters", values_to = "Values")
PivotData2a$sudden_cd <- as.factor(PivotData2a$sudden_cd )
for(i in 1:10){
print(
ggplot(
PivotData2a, aes(Values, colour = sudden_cd ))+
geom_boxplot ()+
facet_wrap_paginate(Parameters~., nrow=4, ncol = 4,page=i, scale="free")+
labs(title="Density comparison between label") + theme(text = element_text(size=8)) +
theme_bw()
)
}
Data <- Data2a %>%
mutate(Label = case_when(sudden_cd == "1" ~ "Yes", TRUE ~ "No" )) %>%
select(-c(primary_ep,sudden_cd, secondary_ep, id)) %>%
drop_na()
Data <- Data %>%
dplyr::rename(Time = time)
PreProc <- recipe(Label ~ . , data = Data) %>%
step_normalize(all_numeric(), -Time) %>%
step_nzv(all_predictors()) #%>%
PreparedPreProc <- PreProc %>%
prep()
invisible(x1 <- Data %>%
select_if(is.numeric) %>%
correlate() %>%
rearrange() %>%
shave())
##
## Correlation method: 'pearson'
## Missing treated using: 'pairwise.complete.obs'
rplot(x1)
## Don't know how to automatically pick scale for object of type noquote. Defaulting to continuous.
source("FunctionsCox2.R") #have to make sure ggstatsplot is the first package loaded
numPartitions <- 400 #number of permutations
method <- 1 #1 - classification, 2 - regression, 3 - multiclassification
#alpha values (0.5,1) - EN and LASSO
alpha <- c(1,0.5) #could add more/different alphas to glmnet algorithms
All2 <- Data
FinalAll2F <- data.frame(All2)
All2F <- FinalAll2F
Surv study
## Call: survfit(formula = SurvTimes ~ 1)
##
## n events rmean* se(rmean) median 0.95LCL 0.95UCL
## [1,] 371 27 79.5 1.47 NA NA NA
## * restricted mean with upper limit = 87
## Call: survfit(formula = SurvTimes ~ 1)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 371 5 0.987 0.00599 0.975 0.998
## 4 364 2 0.981 0.00707 0.967 0.995
## 9 360 1 0.978 0.00756 0.964 0.993
## 10 359 1 0.976 0.00802 0.960 0.991
## 11 358 1 0.973 0.00845 0.957 0.990
## 13 342 4 0.962 0.01008 0.942 0.982
## 14 334 1 0.959 0.01046 0.938 0.979
## 16 320 1 0.956 0.01084 0.935 0.977
## 19 292 2 0.949 0.01172 0.926 0.972
## 21 271 2 0.942 0.01263 0.918 0.967
## 29 188 1 0.937 0.01352 0.911 0.964
## 31 168 1 0.932 0.01455 0.903 0.960
## 35 139 1 0.925 0.01591 0.894 0.957
## 37 119 1 0.917 0.01757 0.883 0.952
## 42 91 1 0.907 0.02006 0.868 0.947
## 70 27 1 0.873 0.03821 0.802 0.952
## 76 14 1 0.811 0.06980 0.685 0.960
## Warning: Strategy 'multiprocess' is deprecated in future (>= 1.20.0). Instead,
## explicitly specify either 'multisession' or 'multicore'. In the current R
## session, 'multiprocess' equals 'multisession'.
## Warning in supportsMulticoreAndRStudio(...): [ONE-TIME WARNING] Forked
## processing ('multicore') is not supported when running R from RStudio
## because it is considered unstable. For more details, how to control forked
## processing or not, and how to silence this warning in future R sessions, see ?
## parallelly::supportsMulticore
FinalAll2F$Label <- as.factor(FinalAll2F$Label)
set.seed(132)
sInitial3 <- lapply(1:numPartitions, function(i) {
smp_size <- floor(0.85 * nrow(FinalAll2F))
#cat(sprintf(" \n \n Iteration (%i/%i)" , i, numPartitions))
train_ind <- sample(seq_len(nrow(FinalAll2F)), size = smp_size)
list(Model=bake(PreparedPreProc, new_data = FinalAll2F[train_ind, ]),
Validation=bake(PreparedPreProc, new_data=FinalAll2F[-train_ind, ]))
})
sInitial3 <- ImbalClass(sInitial3, c("Model","Validation"),0) #This function is needed to take out those test and train sets that have 0 or just 1 sample of class representation
## [1] "Model"
## [1] "Validation"
sFS <- lapply(seq_along(sInitial3), function(i) { #Exactly same info as sInitial3 but different format needed for EN and LASSO algorithms
train <-sInitial3[[i]][["Model"]]
test <- sInitial3[[i]][["Validation"]]
list(xtrain=as.matrix(train[,-c(dim(train)[2],(dim(train)[2]-1))]),ytrain = data.frame(Label=as.numeric(train$Label)-1,Time = train$Time),
xtest = as.matrix(test[,-c(dim(train)[2],(dim(train)[2]-1))]),ytest = data.frame(Label = as.numeric(test$Label)-1,Time = test$Time))
})
sTog <- list(sFS,sFS) #there was another partition - but made no sense - useful if want to do a train and test further partition
a <- sTog
PermutedLength <- lapply(a,length)
print(paste0("Wanted ", numPartitions, " and obtained permuted length of: ", PermutedLength[[1]], " in Feature Selection and ", PermutedLength[[2]] ," in Machine Learning data"))
## [1] "Wanted 400 and obtained permuted length of: 372 in Feature Selection and 372 in Machine Learning data"
LassoEn1NewCox <- function(a1,alpha,method){
aa <- future_lapply(seq_along(a1), function(i) {cv.glmnet(a1[[i]]$xtrain, Surv(a1[[i]]$ytrain$Time,a1[[i]]$ytrain$Label), family="cox", maxit = 1000, alpha = alpha)},future.seed = 132)
aa.fit <- future_lapply(seq_along(a1), function(i) {glmnet(a1[[i]]$xtrain, Surv(a1[[i]]$ytrain$Time,a1[[i]]$ytrain$Label), family="cox", maxit = 1000, alpha = alpha ,lambda=aa[[i]][["lambda.min"]])})
Cval <- future_sapply(seq_along(aa.fit), function(i) {assess.glmnet(aa.fit[[i]], #in this case, we are evaluating the model
newx = a1[[i]]$xtrain, #in the same data used to fit the model
newy = Surv(a1[[i]]$ytrain$Time,a1[[i]]$ytrain$Label), family = "cox" )$C[[1]]})
BetasAndNames <- lapply(seq_along(aa.fit),function(i) {q <- data.frame(Betas=data.table(as.matrix(aa.fit[[i]][["beta"]])))
q <- data.frame(Add= rowSums(q != 0), Names=aa.fit[[i]][["beta"]]@Dimnames[[1]]) })
sEN <- do.call("rbind",BetasAndNames)
names(sEN) <- c("Importance","Names")
sEN <- sEN[sEN$Importance != 0,] #take out all 0s - sparsity
return(list(sEN, Cval))
}
pdd4 <- lapply(c(1), function(j) {
lapply(c(1:2), function(i) {
ggplot(mEN[[j]][[i]], aes(reorder(x,+freq),freq,fill=BarLab))+
geom_bar(stat="identity")+
theme(axis.text.x = element_text(size=8),legend.position="none")+
labs(x="Features",y="Frequency")+
ggtitle(paste0(names(FinalEN[[1]])[i]))+
theme(panel.background = element_blank(),panel.grid.minor = element_line(colour="black"),axis.line = element_line(colour = "black"))+
coord_flip()
})
})
ElasticNet <- data.frame(Names=FinalEN[[1]][["EN"]][["Names"]], Freq = FinalEN[[1]][["EN"]][["Importance"]], Model = "EN")
Lasso <- data.frame(Names=FinalEN[[1]][["LASSO"]][["Names"]], Freq = FinalEN[[1]][["LASSO"]][["Importance"]], Model = "LASSO")
FinalModelSelected <- invisible(full_join(ElasticNet,Lasso))
## Joining, by = c("Names", "Freq", "Model")
FinalModelSelected <- invisible(full_join(FinalModelSelected,RfSumFin))
## Joining, by = c("Names", "Freq", "Model")
FinalModelSelected <- FinalModelSelected %>% mutate_if(is.character, as.factor)
FinalModelSelected2 <- invisible(melt(FinalModelSelected))
## Using Names, Model as id variables
FeatsAll <- ggplot(
FinalModelSelected2, aes(reorder(Names,-value), value ,group=Model, fill=Model))+
geom_bar(stat="identity",width=.7, position = "dodge")+
theme(axis.text.x = element_text(size=10, angle=90))+
labs(title= paste0("Selected features") ,y=(sprintf(" Frequency & Ranking (out of %i)",PermutedLength[[1]])),x=("Features"))+
theme(panel.background = element_blank(),panel.grid.minor = element_line(colour="black"),axis.line = element_line(colour = "black")
)
pdf("FeatsAll.pdf", 15, 7)
print(FeatsAll)
dev.off()
## quartz_off_screen
## 2
## Warning: Removed 4543 rows containing missing values (geom_bar).
Lass <- pdd4[[1]][[1]]
En <- pdd4[[1]][[2]]
(Lass + En + pdd4RF)
pdf("Models.pdf", 15, 18)
print(Lass + En + pdd4RF)
dev.off()
## quartz_off_screen
## 2
## quartz_off_screen
## 2
## [1] "homogeneity_1" "lbp_19" "moment1" "var"
## [5] "contrast_1" "correlation_1" "energy_1" "lgre_3"
## [9] "lre_1" "gln_1" "hgre_1"
https://thomaselove.github.io/432-notes/cox-regression-models-for-survival-data-example-2.html https://stats.stackexchange.com/questions/48298/computing-c-index-for-an-external-validation-of-a-cox-ph-model-with-r https://stats.stackexchange.com/questions/444208/cox-ph-r-rms-package-concordance-train-cv-test
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:parsnip':
##
## translate
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
##
## Attaching package: 'rms'
## The following object is masked _by_ '.GlobalEnv':
##
## Design
## Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
## Loglik converged before variable 2 ; coefficient may be infinite.
## Warning in melt(Single): The melt generic in data.table has been passed a
## data.frame and will attempt to redirect to the relevant reshape2 method;
## please note that reshape2 is deprecated, and this redirection is now
## deprecated as well. To continue using melt methods from reshape2 while both
## libraries are attached, e.g. melt.list, you can prepend the namespace like
## reshape2::melt(Single). In the next version, this warning will become an error.
## No id variables; using all as measure variables
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[rowhead-fg]
## 2 2 (1-1,1-1) arrange text[GRID.text.66844]
## quartz_off_screen
## 2
## [1] "lre_1" "gln_1" "hgre_1" "lgre_3"
## [5] "lbp_19" "contrast_1" "correlation_1" "energy_1"
## [9] "homogeneity_1" "var" "moment1"
## Performance_LASSO_Mean Performance_LASSO_SD Performance_EN_Mean
## 1 0.6969878 0.06172307 0.7079199
## Performance_EN_SD PredictionError_RF_Mean PredictionError_RF_SD
## 1 0.0482745 0.4577323 0.0481219